Verify and extend this, Jesse:
by my calculations, letting \(\hat{L}^{(R)}_d\) denote the resubstitution error rate of the quadratic Bayes plug-in classifier using \(d\) dimensions when \(f_0 = MVN([0,0],I)\) and \(f_1 = MVN([1,0],I)\), conditioning on \(n_0=n_1=n/2\) with \(n=20\) yields \[\mathbb{P}[ \hat{L}^{(R)}_2 > \hat{L}^{(R)}_1 ] \approx 0.18\] and \[\mathbb{P}[ \hat{L}^{(R)}_2 = \hat{L}^{(R)}_1 ] \approx 0.29\] that is, the probability that the second (useless) feature degrades performance, even when testing on the training set, is substantial.
mont <- 500
d <- 4
n <- 20
n0 <- n1 <- n/2
truth <- c(rep(0,n0), rep(1,n1))
mu1 <- rep(0,d)
mu2 <- c(1, rep(0,d-1))
Ls <-
foreach(i = 1:mont, .combine = 'rbind') %dopar% {
set.seed(i)
s0 <- rmvnorm(n0, mean = mu1, sigma = diag(1,d))
s1 <- rmvnorm(n1, mean = mu2, sigma = diag(1,d))
samp_dat <- as.matrix(rbind(s0,s1))
Lrd <-
Reduce('cbind',
lapply(1:d, function(x){
sum(truth != predict(qda(truth ~ samp_dat[, 1:x]))$class)/length(truth)
}))
colnames(Lrd) <- paste0("Lr", 1:d)
as.data.frame(Lrd)
}| measurment | value |
|---|---|
| \(\mathbb{P}\left[\hat{L}^{(R)}_2 > \hat{L}^{(R)}_1\right]\) | 0.178 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_2 = \hat{L}^{(R)}_1\right]\) | 0.296 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_3 > \hat{L}^{(R)}_2\right]\) | 0.14 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_3 = \hat{L}^{(R)}_2\right]\) | 0.252 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_4 > \hat{L}^{(R)}_3\right]\) | 0.108 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_4 = \hat{L}^{(R)}_3\right]\) | 0.282 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_3 > \hat{L}^{(R)}_1\right]\) | 0.074 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_3 = \hat{L}^{(R)}_1\right]\) | 0.174 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_4 > \hat{L}^{(R)}_1\right]\) | 0.036 |
| \(\mathbb{P}\left[\hat{L}^{(R)}_4 = \hat{L}^{(R)}_1\right]\) | 0.076 |
Here we chose a sub-sample where the \(L^{(R)}_{(1:d)} > L^{(R)}_{1}\) for \(d = (2,\dots,9)\)
mu1 <- 1/2
ncDim <- 25
montc <- 500
nc <- 100
nc0 <- nc1 <- nc/2
truC <- c(rep(0,nc0), rep(1, nc1))
sc <- list()
for(mi in 1:montc){
tmp1 <- c(rnorm(nc0, mean = mu1, sd = 1), rnorm(nc1, mean =-mu1, sd = 1))
tmp2 <- sapply(1:(ncDim-1), function(x) rcauchy(nc, location = 0, scale = 1))
sc[[mi]] <- as.matrix(cbind(tmp1, tmp2))
}
qList <-
foreach(i = 1:100, .combine = 'rbind') %:%
foreach(j = 1:10, .combine = 'rbind') %dopar% {
dat <- sc[[i]][, 1:j]
qdaR <- qda(truC ~ dat)
rcl <- as.numeric(predict(qdaR)$class) - 1
qdaD <- qda(truC ~ dat, CV = TRUE)
(Lr <- sum(rcl != truC)/length(truC))
(Ld <- sum((as.numeric(qdaD$class) - 1) != truC)/length(truC))
rbind(
data.frame(Type = "Lr", value = Lr, Run = i, Dim = j),
data.frame(Type = "Ld", value = Ld, Run = i, Dim = j))
}
#m1 <- melt(qList[, 1:2])
#m1 <- melt(qList)
p <- ggplot(qList, aes(x = Type, y = value)) + geom_boxplot(notch = TRUE, alpha = 0.75) +
geom_jitter(aes(color = Type), alpha = 0.5, size = 0.5) +
facet_grid(. ~ Dim) + ggtitle("1st normal, then Cauchy")
show(p)## Warning: Removed 500 rows containing non-finite values (stat_boxplot).
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
## Warning: Removed 500 rows containing missing values (geom_point).
pairs(sc[[1]][, 1:5], col = alpha(truC + 1,0.5), pch = 20, cex = 0.4, xlim = c(-2,2), ylim = c(-2,2))mu1 <- 1/2
ncDim <- 25
montc <- 500
nc <- 100
nc0 <- nc1 <- nc/2
truC <- c(rep(0,nc0), rep(1, nc1))
sc <- list()
for(mi in 1:montc){
tmp1 <- c(rnorm(nc0, mean = mu1, sd = 1), rnorm(nc1, mean =-mu1, sd = 1))
tmp2 <- sapply(1:(ncDim-1), function(x) rnorm(nc, mean = 0, sd = sqrt(1/128)))
sc[[mi]] <- as.matrix(cbind(tmp1, tmp2))
}
aList <-
foreach(i = 1:montc, .combine = 'rbind') %:%
foreach(j = 1:ncDim, .combine = 'rbind') %dopar% {
dat <- sc[[i]][, 1:j]
qdaR <- qda(truC ~ dat)
rcl <- as.numeric(predict(qdaR)$class) - 1
qdaD <- qda(truC ~ dat, CV = TRUE)
(Lr <- sum(rcl != truC)/length(truC))
(Ld <- sum((as.numeric(qdaD$class) - 1) != truC)/length(truC))
rbind(
data.frame(Type = "Lr", value = Lr, Run = i, Dim = j),
data.frame(Type = "Ld", value = Ld, Run = i, Dim = j))
}
#m1 <- melt(qList[, 1:2])
#m1 <- melt(qList)
p2 <- ggplot(aList, aes(x = Type, y = value)) +
geom_jitter(aes(color = Type), alpha = 0.5, size = 0.25) +
geom_boxplot(notch = TRUE, alpha = 0.5) +
facet_grid(. ~ Dim) + ggtitle("1st normal (0.5,-0.5), rest normal(0,0)")
show(p2)## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
pairs(sc[[1]][, 1:10], col = alpha(truC + 1,0.2), pch = 20, cex = 0.4, xlim = c(-2,2), ylim = c(-2,2))plot(sc[[1]][,1])mu1 <- 1/2
ncDim <- 500
montc <- 100
nc <- 5000
nc0 <- nc1 <- nc/2
truC <- c(rep(0,nc0), rep(1, nc1))
sc <- list()
set.seed(317)
for(mi in 1:montc){
tmp1 <- c(rnorm(nc0, mean = mu1, sd = 1), rnorm(nc1, mean =-mu1, sd = 1))
tmp2 <- sapply(1:(ncDim-1), function(x) rnorm(nc, mean = 0, sd = sqrt(1/128)))
sc[[mi]] <- as.matrix(cbind(tmp1, tmp2))
}
bList <-
foreach(i = 1:50, .combine = 'rbind') %:%
foreach(j = c(1,seq(50,ncDim, by = 50)), .combine = 'rbind') %dopar% {
dat <- sc[[i]][, 1:j]
qdaR <- qda(truC ~ dat)
rcl <- as.numeric(predict(qdaR)$class) - 1
qdaD <- qda(truC ~ dat, CV = TRUE)
(Lr <- sum(rcl != truC)/length(truC))
(Ld <- sum((as.numeric(qdaD$class) - 1) != truC)/length(truC))
rbind(
data.frame(Type = "Lr", value = Lr, Run = i, Dim = j),
data.frame(Type = "Ld", value = Ld, Run = i, Dim = j))
}
#m1 <- melt(qList[, 1:2])
#m1 <- melt(qList)
p3 <- ggplot(bList, aes(x = Type, y = value)) + geom_boxplot(notch = TRUE) +
geom_jitter(aes(color = Type), alpha = 0.5, size = 0.25) +
facet_grid(. ~ Dim) + ggtitle("1st normal (0.5,-0.5), rest normal(0,0)")show(p3)## notch went outside hinges. Try setting notch=FALSE.
pairs(sc[[1]][, 1:10], col = alpha(truC + 1,0.2), pch = 20, cex = 0.4, xlim = c(-2,2), ylim = c(-2,2))